#install.packages(c("forecast", "expsmooth", "seasonal")) 
library(TTR)
library(forecast) 
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(tseries) 
library(expsmooth) 
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2 3.4.2     ✔ fma     2.5
## 
library(seasonal)
library(MASS)
## 
## Attaching package: 'MASS'
## The following objects are masked from 'package:fma':
## 
##     cement, housing, petrol
library(stats)
library(TSA)
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library(forecast)
library(ggplot2)
library(tseries)
library(imputeTS)
## 
## Attaching package: 'imputeTS'
## The following object is masked from 'package:tseries':
## 
##     na.remove
library(vars)
## Loading required package: strucchange
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following object is masked from 'package:imputeTS':
## 
##     na.locf
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: urca
## Loading required package: lmtest
library(timetk)
library(Metrics)
## 
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
## 
##     accuracy
library(lmtest)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(xts)
library(dplyr)
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
## 
##     first, last
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite

Step 1: Load data and plot the time series

path <- "/Users/aashishsingh/Desktop/Time Series - Final Project/"
pm2_5_data <- read.csv(paste0(path, "la_pm2_5_pollutant_data.csv"), 
                     na.strings=c("", "NA"))
ozone_data <- read.csv(paste0(path, "la_ozone_pollutant_data.csv"), 
                     na.strings=c("", "NA"))

# Convert data into ts objects
pm2_5_ts <- ts(pm2_5_data$PM2.5.AQI.Value, start = c(1999,1,1), frequency = 365.25)
ozone_ts <- ts(ozone_data$Ozone.AQI.Value, start = c(1999,1,1), frequency = 365.25)

plot(pm2_5_ts, main="Los Angeles PM 2.5 AQI")

plot(ozone_ts, main="Los Angeles Ozone AQI")

Step 2: Check if data is stationary (KPSS/ADF Test)

kpss.test(pm2_5_ts)
## Warning in kpss.test(pm2_5_ts): p-value smaller than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  pm2_5_ts
## KPSS Level = 6.951, Truncation lag parameter = 12, p-value = 0.01
# The reported p-value is 0.01, which is smaller than 0.05, and would suggest 
# that we reject the null hypothesis of level stationarity and conclude that 
# there is evidence that the data is non-stationary

adf.test(pm2_5_ts)
## Warning in adf.test(pm2_5_ts): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  pm2_5_ts
## Dickey-Fuller = -14.554, Lag order = 20, p-value = 0.01
## alternative hypothesis: stationary
# Similarly the Augumented Dickey-Fuller test results in p-value of 0.01 (<0.05) 
# where we do reject the null hypothesis that the time series is non-stationary
# and thus data is stationary.

# These are opposite results so we use ACF PACF plots to see if the series is stationary
acf(pm2_5_ts, main = "ACF of Original Time Series")

pacf(pm2_5_ts, main = "PACF of Original Time Series")

#### Step 3: Prepare data for Weekly & Monthly extraction and EDA

# Create data with Dates
pm2_5_df <- as.data.frame(pm2_5_ts)
pm2_5_df$Date <- mdy(pm2_5_data$Date)
colnames(pm2_5_df) <- c("PM2.5.AQI.Value", "Date")

# Convert to xts format for weekly & monthly
pm2_5_xts <- as.xts(pm2_5_df, order.by=pm2_5_df$Date)
pm2_5_xts <- pm2_5_xts[,-2]

Step 4: Understand PM2.5 AQI variance across years

# Let's see how seasonality looks over time and is the variance changing
pm2_5_df$Month <- month(pm2_5_df$Date)
pm2_5_df$Year <- year(pm2_5_df$Date)

avg_aqi_by_month_year <- pm2_5_df %>%
  group_by(pm2_5_df$Year, pm2_5_df$Month) %>%
  summarise(
    avg_value = mean(PM2.5.AQI.Value)
  )
## `summarise()` has grouped output by 'pm2_5_df$Year'. You can override using the
## `.groups` argument.
colnames(avg_aqi_by_month_year) <- c("Year", "Month", "avg_value")

reshape_avg_aqi_by_month_year <- 
  sqldf(
    "SELECT 
      Month,
      MAX(CASE WHEN Year = 1999 THEN avg_value END) AS Year_1999,
      MAX(CASE WHEN Year = 2000 THEN avg_value END) AS Year_2000,
      MAX(CASE WHEN Year = 2001 THEN avg_value END) AS Year_2001,
      MAX(CASE WHEN Year = 2002 THEN avg_value END) AS Year_2002,
      MAX(CASE WHEN Year = 2003 THEN avg_value END) AS Year_2003,
      MAX(CASE WHEN Year = 2004 THEN avg_value END) AS Year_2004,
      MAX(CASE WHEN Year = 2005 THEN avg_value END) AS Year_2005,
      MAX(CASE WHEN Year = 2006 THEN avg_value END) AS Year_2006,
      MAX(CASE WHEN Year = 2007 THEN avg_value END) AS Year_2007,
      MAX(CASE WHEN Year = 2008 THEN avg_value END) AS Year_2008,
      MAX(CASE WHEN Year = 2009 THEN avg_value END) AS Year_2009,
      MAX(CASE WHEN Year = 2010 THEN avg_value END) AS Year_2010,
      MAX(CASE WHEN Year = 2011 THEN avg_value END) AS Year_2011,
      MAX(CASE WHEN Year = 2012 THEN avg_value END) AS Year_2012,
      MAX(CASE WHEN Year = 2013 THEN avg_value END) AS Year_2013,
      MAX(CASE WHEN Year = 2014 THEN avg_value END) AS Year_2014,
      MAX(CASE WHEN Year = 2015 THEN avg_value END) AS Year_2015,
      MAX(CASE WHEN Year = 2016 THEN avg_value END) AS Year_2016,
      MAX(CASE WHEN Year = 2017 THEN avg_value END) AS Year_2017,
      MAX(CASE WHEN Year = 2018 THEN avg_value END) AS Year_2018,
      MAX(CASE WHEN Year = 2019 THEN avg_value END) AS Year_2019,
      MAX(CASE WHEN Year = 2020 THEN avg_value END) AS Year_2020,
      MAX(CASE WHEN Year = 2021 THEN avg_value END) AS Year_2021,
      MAX(CASE WHEN Year = 2022 THEN avg_value END) AS Year_2022,
      MAX(CASE WHEN Year = 2023 THEN avg_value END) AS Year_2023
    FROM avg_aqi_by_month_year
    GROUP BY Month
    ORDER BY Month"
  )

colnames(reshape_avg_aqi_by_month_year) <- c(
  "Month", "1999", "2000", "2001", "2002", "2003", "2004", "2005", "2006", 
  "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", 
  "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023")

boxplot(reshape_avg_aqi_by_month_year[2:26])

# We can see that over the years there is downward trend and variance is decreasing
# except 2020 when we see a high peak likely caused by wildfires
# Link: https://en.wikipedia.org/wiki/Lake_Fire_(2020)

Step 5a: Extract values for time series and plot the series - Weekly

pm2_5_weekly <- apply.weekly(pm2_5_xts, mean)
pm2_5_weekly_ts <- ts(pm2_5_weekly["19990103/20230811"], 
                      start = c(1999,1),
                      frequency = 52.18)
plot(pm2_5_weekly_ts)

# Strong seasonality, Strong cyclic, light downward trend, variance is reducing

kpss.test(pm2_5_weekly_ts)
## Warning in kpss.test(pm2_5_weekly_ts): p-value smaller than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  pm2_5_weekly_ts
## KPSS Level = 3.4904, Truncation lag parameter = 7, p-value = 0.01
adf.test(pm2_5_weekly_ts)
## Warning in adf.test(pm2_5_weekly_ts): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  pm2_5_weekly_ts
## Dickey-Fuller = -8.4636, Lag order = 10, p-value = 0.01
## alternative hypothesis: stationary
acf(pm2_5_weekly_ts, main = "ACF of Weekly Time Series")

pacf(pm2_5_weekly_ts, main = "PACF of Weekly Time Series")

Step 6: Split data into train and test - Weekly

# Split the data into a training and test dataset
train_weekly <- window(pm2_5_weekly_ts, start = c(1999,1), end=c(2020,52))
test_weekly <- window(pm2_5_weekly_ts, start = c(2021,1))

Step 7: Decompose the time series plot - Weekly

# Looking at spectral analysis
periodogram(train_weekly, log = "no", plot=TRUE, 
            ylab = "Periodogram",
            xlab = "Frequency",
            lwd=2, xlim = c(0, 0.06))

# There are two trends: 
#  1) A typical yearly (52 weeks) seasonal trend
#  2) A trend that is repeating every 9.6 years (500 weeks)
#  3) A typical half yearly (26 weeks) seasonal trend

# Overall, there is no mixed effect that normal seasonality model cannot capture.

# Decompose the series
plot(decompose(train_weekly, type="multiplicative"))

# Important Inferences
# 1) The PM2.5 AQI has been decreasing overall though there are rise every now and then.
#    However the trend is going down with time.
# 2) Winter months have a strong seasonal effect with Nov and Dec being the peak months
#    usually which likely could be due to cold temperatures causing pollutant to
#    not escape the lower atmosphere.
#    Link: https://www.accuweather.com/en/health-wellness/why-air-pollution-is-worse-in-winter/689434#:~:text=Cold%20air%20is%20denser%20and%20moves%20slower%20than%20warm%20air,rate%20than%20during%20the%20summer.
# 3) We can see a seasonal cycle of 12 months where the mean value of each month starts 
#    with a increasing trend in the beginning of the year and drops down towards 
#    the end of the year. We can see a seasonal effect with a cycle of 12 months.


# Understand the seasonality and remove it to see the trend
train_weekly_diff <- diff(train_weekly, lag = 52)
autoplot(train_weekly_diff, ylab="Train Seasonal Differencing - Weekly Data")

# Let's look if the series is stationary?
kpss.test(train_weekly_diff)
## Warning in kpss.test(train_weekly_diff): p-value greater than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  train_weekly_diff
## KPSS Level = 0.17702, Truncation lag parameter = 7, p-value = 0.1
# The reported p-value is 0.1, which is > 0.05, and would suggest that we fail 
# to reject the null hypothesis of level stationarity (conclusion: stationary)

adf.test(train_weekly_diff)
## Warning in adf.test(train_weekly_diff): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train_weekly_diff
## Dickey-Fuller = -8.5534, Lag order = 10, p-value = 0.01
## alternative hypothesis: stationary
# The reported p-value of 0.01 (<0.05) so we do reject the null hypothesis that 
# the time series is non-stationary (conclusion: stationary)

acf(train_weekly_diff, main = "ACF of Seasonal Differencing Time Series - Weekly Data")

pacf(train_weekly_diff, main = "PACF of Seasonal Differencing Time Series - Weekly Data")

Step 8: Benchmark using snaive model - Weekly

# Forecast the seasonal naïve for (2021-01 to 2023-07)
h <- length(test_weekly)
forecast_snaive_weekly <- snaive(train_weekly, lambda="auto", h)
## Warning in lag.default(y, -lag): 'k' is not an integer
summary(forecast_snaive_weekly)
## 
## Forecast method: Seasonal naive method
## 
## Model Information:
## Call: snaive(y = train_weekly, h = h, lambda = "auto") 
## 
## Residual sd: 0.0242 
## 
## Error measures:
##                    ME     RMSE      MAE       MPE     MAPE    MASE      ACF1
## Training set -0.98655 22.24374 16.33231 -5.891141 23.59252 1.01058 0.2606254
## 
## Forecasts:
##          Point Forecast    Lo 80     Hi 80    Lo 95      Hi 95
## 2020.982       56.85714 41.91737  82.45329 36.38049  104.34805
## 2021.001       57.57143 42.35683  83.74534 36.73118  106.22953
## 2021.020       86.42857 59.04383 141.33902 49.72676  197.06989
## 2021.039       63.85714 46.16399  95.38780 39.75018  123.49505
## 2021.058       77.14286 53.88411 121.63452 45.77199  164.46120
## 2021.077       76.00000 53.23623 119.28771 45.27157  160.68347
## 2021.097       62.42857 45.30797  92.69863 39.07432  119.45737
## 2021.116       54.85714 40.67917  78.86895 35.38988   99.16489
## 2021.135       59.28571 43.40573  86.87194 37.56634  110.81117
## 2021.154       66.57143 47.77598 100.56769 41.01838  131.35626
## 2021.173       53.28571 39.69817  76.08710 34.60237   95.17915
## 2021.192       47.85714 36.25179  66.70892 31.81643   81.98215
## 2021.212       32.42857 25.90793  42.00295 23.25949   49.03259
## 2021.231       34.14286 27.10198  44.60602 24.26337   52.37635
## 2021.250       31.71429 25.40676  40.92885 22.83680   47.66194
## 2021.269       47.85714 36.25179  66.70892 31.81643   81.98215
## 2021.288       29.14286 23.58410  37.11350 21.29269   42.83659
## 2021.307       42.71429 32.89937  58.15415 29.07649   70.27037
## 2021.327       53.71429 39.96644  76.84279 34.81796   96.25867
## 2021.346       62.57143 45.39381  92.96640 39.14217  119.85808
## 2021.365       63.42857 45.90774  94.57837 39.54804  122.27659
## 2021.384       48.00000 36.34366  66.95113 31.89110   82.31830
## 2021.403       51.71429 38.70984  73.33545 33.80654   91.26857
## 2021.422       62.57143 45.39381  92.96640 39.14217  119.85808
## 2021.442       51.85714 38.80000  73.58436 33.87923   91.62100
## 2021.461       49.57143 37.35000  69.63172 32.70750   86.05525
## 2021.480       56.28571 41.56477  81.42417 36.09877  102.85442
## 2021.499       56.00000 41.38812  80.91112 35.95752  102.11143
## 2021.518       92.42857 62.28435 154.68600 52.18356  220.06415
## 2021.537       65.57143 47.18426  98.64854 40.55354  128.43081
## 2021.557       54.85714 40.67917  78.86895 35.38988   99.16489
## 2021.576       56.00000 41.38812  80.91112 35.95752  102.11143
## 2021.595       62.42857 45.30797  92.69863 39.07432  119.45737
## 2021.614       61.85714 44.96407  91.63010 38.80232  117.86128
## 2021.633       78.42857 54.60951 124.29503 46.33127  168.77115
## 2021.652      109.00000 70.88950 194.16084 58.61222  292.41376
## 2021.672       59.57143 43.57976  87.39657 37.70465  111.58392
## 2021.691       69.42857 49.45295 106.12060 42.33153  139.90588
## 2021.710      160.71429 95.06826 344.85537 76.00938  632.48436
## 2021.729      122.14286 77.38932 228.33334 63.38140  360.41061
## 2021.748       76.71429 53.64150 120.75248 45.58470  163.03871
## 2021.767       89.42857 60.67289 147.95126 50.96437  208.36966
## 2021.787       89.28571 60.59572 147.63363 50.90586  207.82273
## 2021.806       70.42857 50.03520 108.08860 42.78601  142.96626
## 2021.825       69.57143 49.53628 106.40097 42.39661  140.34089
## 2021.844      105.71429 69.22130 186.02114 57.37643  276.95843
## 2021.863       72.42857 51.19258 112.06289 43.68722  149.19492
## 2021.882       70.85714 50.28401 108.93593 42.97999  144.28880
## 2021.901       76.14286 53.31738 119.58013 45.33429  161.15297
## 2021.921       76.42857 53.47953 120.16578 45.45959  162.09428
## 2021.940       91.57143 61.82570 152.74921 51.83706  216.68182
## 2021.959       76.42857 53.47953 120.16578 45.45959  162.09428
## 2021.978       75.71429 46.70296 149.07265 37.88592  248.23269
## 2021.997       56.85714 37.48202  98.85512 31.14606  145.55294
## 2022.016       57.57143 37.84977 100.58100 31.41990  148.75461
## 2022.036       86.42857 51.54491 182.27547 41.32667  327.59627
## 2022.055       63.85714 41.01971 116.34854 33.76246  179.06166
## 2022.074       77.14286 47.36376 153.29278 38.35930  257.79208
## 2022.093       76.00000 46.83551 149.91174 37.98097  250.12146
## 2022.112       62.42857 40.30943 112.67226 33.24031  171.82380
## 2022.131       54.85714 36.44375  94.09257 30.37055  136.83505
## 2022.151       59.28571 38.72596 104.77733 32.07058  156.63381
## 2022.170       66.57143 42.35346 123.48738 34.73876  193.41882
## 2022.189       53.28571 35.61892  90.42211 29.75192  130.23291
## 2022.208       47.85714 32.70513  78.21637 27.54798  109.00281
## 2022.227       32.42857 23.79787  47.35177 20.61581   60.23920
## 2022.246       34.14286 24.83937  50.51190 21.44271   64.90800
## 2022.266       31.71429 23.35962  46.05439 20.26647   58.34404
## 2022.285       47.85714 32.70513  78.21637 27.54798  109.00281
## 2022.304       29.14286 21.76016  41.47747 18.98437   51.75918
## 2022.323       42.71429 29.84588  67.31288 25.35599   90.97243
## 2022.342       53.71429 35.84468  91.41695 29.92147  132.01234
## 2022.361       62.57143 40.38072 113.03740 33.29279  172.53798
## 2022.381       63.42857 40.80724 115.23984 33.60643  176.86777
## 2022.400       48.00000 32.78314  78.52830 27.60737  109.53157
## 2022.419       51.71429 34.78591  86.81381 29.12482  123.84111
## 2022.438       62.57143 40.38072 113.03740 33.29279  172.53798
## 2022.457       51.85714 34.86198  87.13928 29.18219  124.41367
## 2022.476       49.57143 33.63639  81.99235 28.25556  115.45206
## 2022.496       56.28571 37.18667  97.48391 30.92582  143.02534
## 2022.515       56.00000 37.03860  96.80146 30.81530  141.77266
## 2022.534       92.42857 54.14805 202.50673 43.15061  380.82128
## 2022.553       65.57143 41.86445 120.83362 34.38143  188.03493
## 2022.572       54.85714 36.44375  94.09257 30.37055  136.83505
## 2022.591       56.00000 37.03860  96.80146 30.81530  141.77266
## 2022.611       62.42857 40.30943 112.67226 33.24031  171.82380
## 2022.630       61.85714 40.02368 111.21719 33.02981  168.98806
## 2022.649       78.42857 47.95435 157.14406 38.78134  266.64735
## 2022.668      109.00000 60.97972 265.15750 47.85638  571.69175
## 2022.687       59.57143 38.87112 105.48421 32.17814  157.97432
## 2022.706       69.42857 43.73580 131.22396 35.74498  209.43402
## 2022.726      160.71429 79.60555 543.52771 60.15023 2148.83596
## 2022.745      122.14286 66.06609 322.79068 51.28807  787.40047
## 2022.764       76.71429 47.16603 152.02024 38.21778  254.89376
## 2022.783       89.42857 52.85570 192.23796 42.24730  353.32762
## 2022.802       89.28571 52.79371 191.75668 42.20386  352.06343
## 2022.821       70.42857 44.21454 133.98653 36.09214  215.26899
## 2022.841       69.57143 43.80435 131.61686 35.79473  210.26013
## 2022.860      105.71429 59.66423 251.89507 46.95907  527.77153
## 2022.879       72.42857 45.16431 139.59826 36.77889  227.31238
## 2022.898       70.85714 44.41892 135.17929 36.24014  217.80734
## 2022.917       76.14286 46.90171 150.33220 38.02843  251.07014
## 2022.936       76.42857 47.03397 151.17499 38.12320  252.97615
## 2022.956       91.57143 53.78066 199.54105 42.89425  372.77808
## 2022.975       76.42857 42.88434 184.62664 33.68780  394.19440
## 2022.994       75.71429 42.59841 181.74281 33.49277  384.64280
## 2023.013       56.85714 34.56252 115.27267 27.89433  198.13893
## 2023.032       57.57143 34.88572 117.47836 28.12408  203.38217
## 2023.051       86.42857 46.76444 228.20990 36.30785  555.92271
## 2023.071       63.85714 37.66206 137.89356 30.08139  254.81833
## 2023.090       77.14286 43.16906 187.53972 33.88173  403.97901
## 2023.109       76.00000 42.71293 182.89285 33.57091  388.43591
## 2023.128       62.42857 37.04145 133.09145 29.64635  242.23709
## 2023.147       54.85714 33.64877 109.21610 27.24258  184.04590
## 2023.166       59.28571 35.65482 122.86529 28.66920  216.44036
## 2023.186       66.57143 38.82520 147.29098 30.89294  280.33055
## 2023.205       53.28571 32.92151 104.57864 26.72150  173.55208
## 2023.224       47.85714 30.34259  89.35009 24.85644  140.85442
## 2023.243       32.42857 22.35770  52.23081 18.89607   71.88759
## 2023.262       34.14286 23.29974  55.93504 19.61501   78.11108
## 2023.281       31.71429 21.96060  50.71676 18.59166   69.38508
## 2023.300       47.85714 30.34259  89.35009 24.85644  140.85442
## 2023.320       29.14286 20.50772  45.40721 17.47087   60.79786
## 2023.339       42.71429 27.79659  76.00339 22.98765  114.34569
## 2023.358       53.71429 33.12068 105.83295 26.86441  176.36524
## 2023.377       62.57143 37.10378 133.56727 29.69011  243.47020
## 2023.396       63.42857 37.47650 136.44265 29.95146  250.98500
## 2023.415       48.00000 30.41183  89.73553 24.90687  141.64936
## 2023.435       51.71429 32.18580 100.04566 26.19221  163.53958
## 2023.454       62.57143 37.10378 133.56727 29.69011  243.47020
## 2023.473       51.85714 32.25304 100.45349 26.24067  164.43057
## 2023.492       49.57143 31.16850  94.02920 25.45671  150.61814
## 2023.511       56.28571 34.30278 113.52437 27.70940  194.02521
## 2023.530       56.00000 34.17251 112.65560 27.61655  191.99483
## 2023.550       92.42857 48.98985 257.47722 37.78903  685.11752
## 2023.569       65.57143 38.39908 143.78651 30.59619  270.67668
## 2023.588       54.85714 33.64877 109.21610 27.24258  184.04590
# Plot the forecasts for snaive model
plot(forecast_snaive_weekly, main = "PM 2.5 AQI Forecast - SNaive (Weekly)",
         xlab = "Week", ylab = "PM 2.5 AQI")
lines(test_weekly)

# Compare the forecast with the actual test data by calculating MAPE and MSE
# Symmetric Mean Absolute Percentage Error (sMAPE)
smape_snaive_weekly <- smape(test_weekly, forecast_snaive_weekly$mean)
smape_snaive_weekly
## [1] 0.2411123
# Mean Absolute Error (MAE)
mae_snaive_weekly <- mean(abs((test_weekly - forecast_snaive_weekly$mean)))
mae_snaive_weekly
## [1] 15.78571
# Root Mean Squared Error (RMSE)
rmse_snaive_weekly <- rmse(test_weekly, forecast_snaive_weekly$mean)
rmse_snaive_weekly
## [1] 23.2872
# Evaluate the residuals
checkresiduals(forecast_snaive_weekly)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 556.9, df = 104.36, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 104.36

Step 9: Forecast using SARIMA - Weekly

eacf <-
function (z,ar.max=7,ma.max=13) 
{
#
#  PROGRAMMED BY K.S. CHAN, DEPARTMENT OF STATISTICS AND ACTUARIAL SCIENCE,
#  UNIVERSITY OF IOWA.
#
#  DATE: 4/2001
#  Compute the extended sample acf (ESACF) for the time series stored in z.
#  The matrix of ESACF with the AR order up to ar.max and the MA order
#  up to ma.max is stored in the matrix EACFM.
#  The default values for NAR and NMA are 7 and 13 respectively.
#  Side effect of the eacf function:
#  The function prints a coded ESACF table with
#  significant values denoted by * and nosignificant values by 0, significance
#  level being 5%.
#
#  Output:
#   eacf=matrix of esacf
#   symbol=matrix of coded esacf
#

lag1<-function(z,lag=1){c(rep(NA,lag),z[1:(length(z)-lag)])}
reupm<-function(m1,nrow,ncol){
k<-ncol-1
m2<-NULL
for (i in 1:k){
i1<-i+1
work<-lag1(m1[,i])
work[1]<--1
temp<-m1[,i1]-work*m1[i1,i1]/m1[i,i]
temp[i1]<-0
m2<-cbind(m2,temp)
}
m2}
ceascf<-function(m,cov1,nar,ncol,count,ncov,z,zm){
result<-0*seq(1,nar+1)
result[1]<-cov1[ncov+count]
for (i in 1:nar) {
temp<-cbind(z[-(1:i)],zm[-(1:i),1:i])%*%c(1,-m[1:i,i])
result[i+1]<-acf(temp,plot=FALSE,lag.max=count,drop.lag.0=FALSE)$acf[count+1]
}
result
}

ar.max<-ar.max+1
ma.max<-ma.max+1
nar<-ar.max-1
nma<-ma.max
ncov<-nar+nma+2
nrow<-nar+nma+1
ncol<-nrow-1
z<-z-mean(z)
zm<-NULL
for(i in 1:nar) zm<-cbind(zm,lag1(z,lag=i))
cov1<-acf(z,lag.max=ncov,plot=FALSE,drop.lag.0=FALSE)$acf
cov1<-c(rev(cov1[-1]),cov1)
ncov<-ncov+1
m1<-matrix(0,ncol=ncol,nrow=nrow)
for(i in 1:ncol) m1[1:i,i]<-
ar.ols(z,order.max=i,aic=FALSE,demean=FALSE,intercept=FALSE)$ar
eacfm<-NULL
for (i in 1:nma) {
m2<-reupm(m1=m1,nrow=nrow,ncol=ncol)
ncol<-ncol-1
eacfm<-cbind(eacfm, ceascf(m2,cov1,nar,ncol,i,ncov,z,zm))
m1<-m2}
work<-1:(nar+1)
work<-length(z)-work+1
symbol<-NULL
for ( i in 1:nma) {
work<-work-1
symbol<-cbind(symbol,ifelse(abs(eacfm[,i])>2/work^.5, 'x','o'))}
rownames(symbol)<-0:(ar.max-1)
colnames(symbol)<-0:(ma.max-1)
cat('AR/MA\n')
print(symbol,quote=FALSE)
invisible(list(eacf=eacfm,ar.max=ar.max,ma.ma=ma.max,symbol=symbol))
}
eacf(train_weekly)
## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x x x x x x x x x x x  o  o  o 
## 1 x x o o o o o x x o x  o  o  o 
## 2 x x o o o o o o x x x  o  o  o 
## 3 x x o o o o o o o o o  o  o  o 
## 4 x x x o o o o o o o o  o  o  o 
## 5 x x x x o o o o o o o  o  o  o 
## 6 x x o x o o o o o o o  o  o  o 
## 7 x x o x o x o o o o o  x  o  o
# p <- 1,2,3
# q <- 2,3,4
# Forecast the SARIMA for (2021-01 to 2023-07)
h <- length(test_weekly)

model_sarima_weekly <- auto.arima(train_weekly, 
                                  seasonal = TRUE, 
                                  stepwise = FALSE,
                                  trace = FALSE,
                                  lambda="auto",
                                  start.p = 1, 
                                  max.p = 3,
                                  start.q = 2, 
                                  max.q = 4)

forecast_sarima_weekly <- forecast(model_sarima_weekly, h)
plot(forecast_sarima_weekly)

# Plot the forecasts for SARIMA model
plot(pm2_5_weekly_ts, 
     xlim=c(1999, 2023), 
     ylim=c(min(pm2_5_weekly_ts), 
            max(pm2_5_weekly_ts)), 
     main="Train, Test, and Forecasted Data - Weekly", 
     xlab="Year", 
     ylab="PM2.5 AQI Value")
lines(test_weekly, col="red") # Test data in red
lines(forecast_sarima_weekly$mean, col="blue", type="o") # Forecasted data in blue
legend("topleft", 
       legend=c("Train", "Test", "Forecast"), 
       fill=c("black", "red", "blue"))

# Compare the forecast with the actual test data by calculating MAPE and MSE
# Symmetric Mean Absolute Percentage Error (sMAPE)
smape_sarima_weekly <- smape(test_weekly, forecast_sarima_weekly$mean)
smape_sarima_weekly
## [1] 0.1505205
# Mean Absolute Error (MAE)
mae_sarima_weekly <- mean(abs((test_weekly - forecast_sarima_weekly$mean)))
mae_sarima_weekly
## [1] 9.323144
# Root Mean Squared Error (RMSE)
rmse_sarima_weekly <- rmse(test_weekly, forecast_sarima_weekly$mean)
rmse_sarima_weekly
## [1] 13.00341
# Evaluate the residuals
checkresiduals(forecast_sarima_weekly)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,2)(0,0,2)[52]
## Q* = 170.19, df = 99.36, p-value = 1.271e-05
## 
## Model df: 5.   Total lags used: 104.36
comparison_df <- data.frame(
  Date = time(test_weekly),
  Actual = as.vector(test_weekly),
  Forecasted = as.vector(forecast_sarima_weekly$mean)
)
print(comparison_df)
##         Date    Actual Forecasted
## 1   2021.001  91.57143   69.64093
## 2   2021.020  83.28571   68.57073
## 3   2021.039  77.28571   66.31421
## 4   2021.058  52.57143   62.64516
## 5   2021.077  46.42857   64.01551
## 6   2021.097  65.57143   63.88958
## 7   2021.116  64.57143   60.42603
## 8   2021.135  54.57143   59.75798
## 9   2021.154  58.42857   61.87282
## 10  2021.173  52.14286   61.98156
## 11  2021.192  43.85714   59.51121
## 12  2021.212  45.14286   60.08813
## 13  2021.231  50.85714   59.82784
## 14  2021.250  60.42857   60.55846
## 15  2021.269  63.14286   59.27270
## 16  2021.288  62.71429   60.27206
## 17  2021.307  55.00000   61.44740
## 18  2021.327  54.00000   63.33185
## 19  2021.346  68.14286   62.33491
## 20  2021.365  62.85714   60.03415
## 21  2021.384  56.14286   61.03920
## 22  2021.403  63.14286   59.86888
## 23  2021.422  60.71429   61.89483
## 24  2021.442  52.71429   62.59218
## 25  2021.461  67.28571   62.66325
## 26  2021.480  55.85714   60.88138
## 27  2021.499  73.85714   62.18989
## 28  2021.518  81.85714   65.09308
## 29  2021.537  61.71429   63.37980
## 30  2021.557  68.85714   62.38472
## 31  2021.576  59.14286   62.66551
## 32  2021.595  63.28571   63.02392
## 33  2021.614  58.14286   62.39102
## 34  2021.633  64.00000   63.10910
## 35  2021.652  71.42857   63.07673
## 36  2021.672  69.42857   62.36332
## 37  2021.691  63.42857   62.24822
## 38  2021.710  62.85714   64.06162
## 39  2021.729  70.14286   62.82165
## 40  2021.748  57.85714   61.32171
## 41  2021.767  52.57143   63.30559
## 42  2021.787  67.00000   65.89231
## 43  2021.806  50.14286   63.00507
## 44  2021.825  61.00000   62.36967
## 45  2021.844 107.14286   64.48247
## 46  2021.863  73.42857   64.93316
## 47  2021.882  98.85714   65.08945
## 48  2021.901  63.42857   63.10088
## 49  2021.921 125.71429   64.15323
## 50  2021.940  82.57143   63.59650
## 51  2021.959  65.00000   64.60115
## 52  2021.978  74.42857   62.39163
## 53  2021.997  58.14286   62.14771
## 54  2022.016  83.71429   64.76254
## 55  2022.036  64.42857   63.15716
## 56  2022.055  64.57143   64.38684
## 57  2022.074  63.71429   64.38337
## 58  2022.093  69.57143   62.81888
## 59  2022.112  64.71429   62.02881
## 60  2022.131  58.85714   62.88325
## 61  2022.151  49.14286   63.66549
## 62  2022.170  55.42857   62.07108
## 63  2022.189  56.14286   61.26165
## 64  2022.208  51.71429   58.19944
## 65  2022.227  57.42857   58.53291
## 66  2022.246  50.57143   57.71768
## 67  2022.266  63.42857   61.10544
## 68  2022.285  44.57143   57.04014
## 69  2022.304  43.85714   60.31293
## 70  2022.323  62.71429   61.80743
## 71  2022.342  61.14286   63.00140
## 72  2022.361  49.28571   63.13848
## 73  2022.381  56.42857   61.12932
## 74  2022.400  58.85714   61.76994
## 75  2022.419  60.14286   62.99788
## 76  2022.438  61.28571   61.57881
## 77  2022.457  56.28571   61.28201
## 78  2022.476  62.00000   62.16017
## 79  2022.496  59.14286   62.23480
## 80  2022.515  76.14286   65.20355
## 81  2022.534  56.14286   63.33534
## 82  2022.553  57.85714   62.17924
## 83  2022.572  53.00000   62.15819
## 84  2022.591  50.28571   62.97069
## 85  2022.611  56.85714   62.86428
## 86  2022.630  57.42857   64.43846
## 87  2022.649  56.85714   66.18033
## 88  2022.668  62.28571   62.62236
## 89  2022.687  53.00000   63.64845
## 90  2022.706  50.85714   68.26799
## 91  2022.726  40.71429   67.00969
## 92  2022.745  56.71429   64.35512
## 93  2022.764  65.14286   65.49149
## 94  2022.783  58.00000   65.37046
## 95  2022.802  54.42857   63.88099
## 96  2022.821  58.85714   63.56436
## 97  2022.841  52.85714   66.03499
## 98  2022.860  50.57143   63.76088
## 99  2022.879  64.42857   63.72964
## 100 2022.898  77.71429   64.14660
## 101 2022.917  60.28571   64.61235
## 102 2022.936  54.28571   65.69197
## 103 2022.956  61.28571   64.06401
## 104 2022.975  99.00000   64.06332
## 105 2022.994  51.42857   63.66508
## 106 2023.013  48.42857   63.48115
## 107 2023.032  51.00000   63.35319
## 108 2023.051  58.00000   63.26406
## 109 2023.071  61.85714   63.20193
## 110 2023.090  60.14286   63.15860
## 111 2023.109  54.71429   63.12837
## 112 2023.128  55.42857   63.10726
## 113 2023.147  47.71429   63.09253
## 114 2023.166  48.14286   63.08224
## 115 2023.186  39.42857   63.07506
## 116 2023.205  48.57143   63.07005
## 117 2023.224  42.42857   63.06654
## 118 2023.243  49.14286   63.06410
## 119 2023.262  49.14286   63.06239
## 120 2023.281  63.85714   63.06120
## 121 2023.300  61.85714   63.06036
## 122 2023.320  70.28571   63.05978
## 123 2023.339  31.71429   63.05938
## 124 2023.358  52.42857   63.05909
## 125 2023.377  63.71429   63.05889
## 126 2023.396  44.57143   63.05876
## 127 2023.415  46.00000   63.05866
## 128 2023.435  38.42857   63.05859
## 129 2023.454  46.85714   63.05854
## 130 2023.473  50.85714   63.05851
## 131 2023.492  62.14286   63.05849
## 132 2023.511  88.85714   63.05847
## 133 2023.530  58.71429   63.05846
## 134 2023.550  60.42857   63.05845
## 135 2023.569  65.14286   63.05845
## 136 2023.588  62.14286   63.05844
## 137 2023.607  54.60000   63.05844
Box.test(residuals(forecast_sarima_weekly), lag = 52, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  residuals(forecast_sarima_weekly)
## X-squared = 84.274, df = 52, p-value = 0.003075

Step 10: Forecast using TBATS - Weekly

# Forecast the TBATS for (2021-01 to 2023-07)
h <- length(test_weekly)

vec <- as.vector(train_weekly)
univariate_ts <- ts(vec, start=1999, frequency=52.18)
plot(stl(univariate_ts, s.window="periodic"))

model_tbats_weekly <- tbats(train_weekly,
                            use.box.cox = TRUE)
forecast_tbats_weekly <- forecast(model_tbats_weekly, h)
plot(forecast_tbats_weekly)

# Plot the forecasts for TBATS model
plot(pm2_5_weekly_ts, 
     xlim=c(1999, 2023), 
     ylim=c(min(pm2_5_weekly_ts), 
            max(pm2_5_weekly_ts)), 
     main="Train, Test, and Forecasted Data - Weekly", 
     xlab="Year", 
     ylab="PM2.5 AQI Value")
lines(test_weekly, col="red") # Test data in red
lines(forecast_tbats_weekly$mean, col="blue", type="o") # Forecasted data in blue
legend("topleft", 
       legend=c("Train", "Test", "Forecast"), 
       fill=c("black", "red", "blue"))

# Compare the forecast with the actual test data by calculating MAPE and MSE
# Symmetric Mean Absolute Percentage Error (sMAPE)
smape_tbats_weekly <- smape(test_weekly, forecast_tbats_weekly$mean)
smape_tbats_weekly
## [1] 0.1485914
# Mean Absolute Error (MAE)
mae_tbats_weekly <- mean(abs((test_weekly - forecast_tbats_weekly$mean)))
mae_tbats_weekly
## [1] 9.247983
# Root Mean Squared Error (RMSE)
rmse_tbats_weekly <- rmse(test_weekly, forecast_tbats_weekly$mean)
rmse_tbats_weekly
## [1] 12.32599
# Evaluate the residuals
checkresiduals(forecast_tbats_weekly)

## 
##  Ljung-Box test
## 
## data:  Residuals from TBATS(0, {0,3}, 0.886, {<52.18,7>})
## Q* = 128.92, df = 104.36, p-value = 0.05179
## 
## Model df: 0.   Total lags used: 104.36
comparison_df <- data.frame(
  Date = time(test_weekly),
  Actual = as.vector(test_weekly),
  Forecasted = as.vector(forecast_tbats_weekly$mean)
)
print(comparison_df)
##         Date    Actual Forecasted
## 1   2021.001  91.57143   79.76285
## 2   2021.020  83.28571   81.90778
## 3   2021.039  77.28571   80.15252
## 4   2021.058  52.57143   75.73405
## 5   2021.077  46.42857   69.93137
## 6   2021.097  65.57143   64.53847
## 7   2021.116  64.57143   60.93566
## 8   2021.135  54.57143   59.52249
## 9   2021.154  58.42857   59.74437
## 10  2021.173  52.14286   60.36729
## 11  2021.192  43.85714   60.06514
## 12  2021.212  45.14286   58.25418
## 13  2021.231  50.85714   55.52292
## 14  2021.250  60.42857   53.17368
## 15  2021.269  63.14286   52.36580
## 16  2021.288  62.71429   53.55979
## 17  2021.307  55.00000   56.32865
## 18  2021.327  54.00000   59.41406
## 19  2021.346  68.14286   61.25017
## 20  2021.365  62.85714   60.99757
## 21  2021.384  56.14286   59.24536
## 22  2021.403  63.14286   57.57820
## 23  2021.422  60.71429   57.52748
## 24  2021.442  52.71429   59.84829
## 25  2021.461  67.28571   64.22669
## 26  2021.480  55.85714   69.20070
## 27  2021.499  73.85714   72.60041
## 28  2021.518  81.85714   72.82056
## 29  2021.537  61.71429   70.03912
## 30  2021.557  68.85714   66.01692
## 31  2021.576  59.14286   62.75970
## 32  2021.595  63.28571   61.47326
## 33  2021.614  58.14286   62.33043
## 34  2021.633  64.00000   64.64065
## 35  2021.652  71.42857   67.16719
## 36  2021.672  69.42857   68.67033
## 37  2021.691  63.42857   68.55306
## 38  2021.710  62.85714   67.13541
## 39  2021.729  70.14286   65.32834
## 40  2021.748  57.85714   64.06076
## 41  2021.767  52.57143   63.90400
## 42  2021.787  67.00000   64.98187
## 43  2021.806  50.14286   67.02575
## 44  2021.825  61.00000   69.47144
## 45  2021.844 107.14286   71.60396
## 46  2021.863  73.42857   72.79015
## 47  2021.882  98.85714   72.76263
## 48  2021.901  63.42857   71.80027
## 49  2021.921 125.71429   70.64740
## 50  2021.940  82.57143   70.18538
## 51  2021.959  65.00000   71.02913
## 52  2021.978  74.42857   73.19265
## 53  2021.997  58.14286   75.89627
## 54  2022.016  83.71429   77.68823
## 55  2022.036  64.42857   77.12026
## 56  2022.055  64.57143   73.75393
## 57  2022.074  63.71429   68.59693
## 58  2022.093  69.57143   63.44149
## 59  2022.112  64.71429   59.78428
## 60  2022.131  58.85714   58.21952
## 61  2022.151  49.14286   58.38302
## 62  2022.170  55.42857   59.16884
## 63  2022.189  56.14286   59.23610
## 64  2022.208  51.71429   57.82145
## 65  2022.227  57.42857   55.30064
## 66  2022.246  50.57143   52.89422
## 67  2022.266  63.42857   51.83215
## 68  2022.285  44.57143   52.72377
## 69  2022.304  43.85714   55.31528
## 70  2022.323  62.71429   58.49065
## 71  2022.342  61.14286   60.68321
## 72  2022.361  49.28571   60.84876
## 73  2022.381  56.42857   59.31154
## 74  2022.400  58.85714   57.54305
## 75  2022.419  60.14286   57.14814
## 76  2022.438  61.28571   59.05077
## 77  2022.457  56.28571   63.14531
## 78  2022.476  62.00000   68.17263
## 79  2022.496  59.14286   72.02334
## 80  2022.515  76.14286   72.87993
## 81  2022.534  56.14286   70.56173
## 82  2022.553  57.85714   66.62489
## 83  2022.572  53.00000   63.12852
## 84  2022.591  50.28571   61.45866
## 85  2022.611  56.85714   61.96773
## 86  2022.630  57.42857   64.10343
## 87  2022.649  56.85714   66.69659
## 88  2022.668  62.28571   68.45928
## 89  2022.687  53.00000   68.64162
## 90  2022.706  50.85714   67.40666
## 91  2022.726  40.71429   65.60068
## 92  2022.745  56.71429   64.18580
## 93  2022.764  65.14286   63.81107
## 94  2022.783  58.00000   64.67913
## 95  2022.802  54.42857   66.58590
## 96  2022.821  58.85714   69.01204
## 97  2022.841  52.85714   71.25557
## 98  2022.860  50.57143   72.64784
## 99  2022.879  64.42857   72.83669
## 100 2022.898  77.71429   71.99981
## 101 2022.917  60.28571   70.81647
## 102 2022.936  54.28571   70.17340
## 103 2022.956  61.28571   70.76115
## 104 2022.975  99.00000   72.72148
## 105 2022.994  51.42857   75.42154
## 106 2023.013  48.42857   77.49853
## 107 2023.032  51.00000   77.42970
## 108 2023.051  58.00000   74.53271
## 109 2023.071  61.85714   69.57444
## 110 2023.090  60.14286   64.28845
## 111 2023.109  54.71429   60.28911
## 112 2023.128  55.42857   58.34891
## 113 2023.147  47.71429   58.26621
## 114 2023.166  48.14286   59.04205
## 115 2023.186  39.42857   59.32318
## 116 2023.205  48.57143   58.18239
## 117 2023.224  42.42857   55.78515
## 118 2023.243  49.14286   53.25692
## 119 2023.262  49.14286   51.88511
## 120 2023.281  63.85714   52.41753
## 121 2023.300  61.85714   54.76253
## 122 2023.320  70.28571   57.94557
## 123 2023.339  31.71429   60.42250
## 124 2023.358  52.42857   60.97324
## 125 2023.377  63.71429   59.65558
## 126 2023.396  44.57143   57.80028
## 127 2023.415  46.00000   57.06366
## 128 2023.435  38.42857   58.52809
## 129 2023.454  46.85714   62.28755
## 130 2023.473  50.85714   67.28797
## 131 2023.492  62.14286   71.51375
## 132 2023.511  88.85714   72.98177
## 133 2023.530  58.71429   71.15887
## 134 2023.550  60.42857   67.35630
## 135 2023.569  65.14286   63.64954
## 136 2023.588  62.14286   61.59527
## 137 2023.607  54.60000   61.72969
Box.test(residuals(forecast_tbats_weekly), lag = 52, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  residuals(forecast_tbats_weekly)
## X-squared = 59.87, df = 52, p-value = 0.2117

Step 11: Forecast using Neural Network - Weekly

h <- length(test_weekly)
model_nn_weekly <- nnetar(train_weekly,
                          size=10,
                          decay=0.5,
                          repeats=1000,
                          lambda="auto")
summary(model_nn_weekly)
##           Length Class        Mode     
## x         1147   ts           numeric  
## m            1   -none-       numeric  
## p            1   -none-       numeric  
## P            1   -none-       numeric  
## scalex       2   -none-       list     
## size         1   -none-       numeric  
## lambda       1   -none-       numeric  
## subset    1147   -none-       numeric  
## model     1000   nnetarmodels list     
## nnetargs     1   -none-       list     
## fitted    1147   ts           numeric  
## residuals 1147   ts           numeric  
## lags        19   -none-       numeric  
## series       1   -none-       character
## method       1   -none-       character
## call         6   -none-       call
forecast_nn_weekly <- forecast(model_nn_weekly, PI=FALSE, h)
plot(forecast_nn_weekly)

# Plot the forecasts for NN model
plot(pm2_5_weekly_ts, 
     xlim=c(1999, 2023), 
     ylim=c(min(pm2_5_weekly_ts), 
            max(pm2_5_weekly_ts)), 
     main="Train, Test, and Forecasted Data - Weekly", 
     xlab="Year", 
     ylab="PM2.5 AQI Value")
lines(test_weekly, col="red") # Test data in red
lines(forecast_nn_weekly$mean, col="blue", type="o") # Forecasted data in blue
legend("topleft", 
       legend=c("Train", "Test", "Forecast"), 
       fill=c("black", "red", "blue"))

# Compare the forecast with the actual test data by calculating MAPE and MSE
# Symmetric Mean Absolute Percentage Error (sMAPE)
smape_nn_weekly <- smape(test_weekly, forecast_nn_weekly$mean)
smape_nn_weekly
## [1] 0.1993593
# Mean Absolute Error (MAE)
mae_nn_weekly <- mean(abs((test_weekly - forecast_nn_weekly$mean)))
mae_nn_weekly
## [1] 12.66333
# Root Mean Squared Error (RMSE)
rmse_nn_weekly <- rmse(test_weekly, forecast_nn_weekly$mean)
rmse_nn_weekly
## [1] 15.81057
# Evaluate the residuals
checkresiduals(forecast_nn_weekly)

## 
##  Ljung-Box test
## 
## data:  Residuals from NNAR(18,1,10)[52]
## Q* = 108.23, df = 104.36, p-value = 0.378
## 
## Model df: 0.   Total lags used: 104.36
comparison_df <- data.frame(
  Date = time(test_weekly),
  Actual = as.vector(test_weekly),
  Forecasted = as.vector(forecast_nn_weekly$mean)
)
print(comparison_df)
##         Date    Actual Forecasted
## 1   2021.001  91.57143   68.42405
## 2   2021.020  83.28571   72.29554
## 3   2021.039  77.28571   71.33081
## 4   2021.058  52.57143   67.27083
## 5   2021.077  46.42857   63.83419
## 6   2021.097  65.57143   66.48513
## 7   2021.116  64.57143   65.74059
## 8   2021.135  54.57143   65.24787
## 9   2021.154  58.42857   64.63117
## 10  2021.173  52.14286   63.25110
## 11  2021.192  43.85714   63.36738
## 12  2021.212  45.14286   62.00032
## 13  2021.231  50.85714   60.91066
## 14  2021.250  60.42857   60.25724
## 15  2021.269  63.14286   58.29467
## 16  2021.288  62.71429   55.93591
## 17  2021.307  55.00000   53.97840
## 18  2021.327  54.00000   54.07835
## 19  2021.346  68.14286   56.57009
## 20  2021.365  62.85714   57.71174
## 21  2021.384  56.14286   54.13220
## 22  2021.403  63.14286   53.33300
## 23  2021.422  60.71429   55.11071
## 24  2021.442  52.71429   54.40803
## 25  2021.461  67.28571   53.22639
## 26  2021.480  55.85714   53.09114
## 27  2021.499  73.85714   53.21331
## 28  2021.518  81.85714   61.48821
## 29  2021.537  61.71429   60.53609
## 30  2021.557  68.85714   58.56270
## 31  2021.576  59.14286   58.01577
## 32  2021.595  63.28571   58.81724
## 33  2021.614  58.14286   59.78005
## 34  2021.633  64.00000   61.46103
## 35  2021.652  71.42857   66.35241
## 36  2021.672  69.42857   64.47063
## 37  2021.691  63.42857   64.72433
## 38  2021.710  62.85714   69.40620
## 39  2021.729  70.14286   70.71035
## 40  2021.748  57.85714   69.35818
## 41  2021.767  52.57143   68.83924
## 42  2021.787  67.00000   68.79584
## 43  2021.806  50.14286   68.77731
## 44  2021.825  61.00000   70.26783
## 45  2021.844 107.14286   70.77894
## 46  2021.863  73.42857   71.10340
## 47  2021.882  98.85714   70.95338
## 48  2021.901  63.42857   70.71238
## 49  2021.921 125.71429   71.45600
## 50  2021.940  82.57143   74.20331
## 51  2021.959  65.00000   73.30197
## 52  2021.978  74.42857   73.54241
## 53  2021.997  58.14286   73.34352
## 54  2022.016  83.71429   74.29803
## 55  2022.036  64.42857   74.29355
## 56  2022.055  64.57143   73.48051
## 57  2022.074  63.71429   72.59707
## 58  2022.093  69.57143   72.62417
## 59  2022.112  64.71429   72.17622
## 60  2022.131  58.85714   72.14287
## 61  2022.151  49.14286   71.83840
## 62  2022.170  55.42857   71.07127
## 63  2022.189  56.14286   70.47669
## 64  2022.208  51.71429   69.89991
## 65  2022.227  57.42857   69.48774
## 66  2022.246  50.57143   69.08552
## 67  2022.266  63.42857   68.00863
## 68  2022.285  44.57143   67.18631
## 69  2022.304  43.85714   66.35223
## 70  2022.323  62.71429   65.81553
## 71  2022.342  61.14286   65.59211
## 72  2022.361  49.28571   65.33097
## 73  2022.381  56.42857   64.50411
## 74  2022.400  58.85714   63.89400
## 75  2022.419  60.14286   63.59363
## 76  2022.438  61.28571   63.21878
## 77  2022.457  56.28571   62.59149
## 78  2022.476  62.00000   61.83230
## 79  2022.496  59.14286   61.16213
## 80  2022.515  76.14286   62.13027
## 81  2022.534  56.14286   62.52325
## 82  2022.553  57.85714   62.31969
## 83  2022.572  53.00000   61.96955
## 84  2022.591  50.28571   61.90931
## 85  2022.611  56.85714   62.34546
## 86  2022.630  57.42857   62.98371
## 87  2022.649  56.85714   64.47891
## 88  2022.668  62.28571   65.00678
## 89  2022.687  53.00000   65.43970
## 90  2022.706  50.85714   66.93743
## 91  2022.726  40.71429   68.33574
## 92  2022.745  56.71429   69.23001
## 93  2022.764  65.14286   69.78198
## 94  2022.783  58.00000   70.28739
## 95  2022.802  54.42857   70.59542
## 96  2022.821  58.85714   71.02850
## 97  2022.841  52.85714   71.30843
## 98  2022.860  50.57143   71.51944
## 99  2022.879  64.42857   71.68420
## 100 2022.898  77.71429   71.68080
## 101 2022.917  60.28571   71.96329
## 102 2022.936  54.28571   72.66870
## 103 2022.956  61.28571   72.97898
## 104 2022.975  99.00000   73.37952
## 105 2022.994  51.42857   73.87644
## 106 2023.013  48.42857   74.66916
## 107 2023.032  51.00000   75.29768
## 108 2023.051  58.00000   75.62237
## 109 2023.071  61.85714   75.78753
## 110 2023.090  60.14286   76.08720
## 111 2023.109  54.71429   76.20071
## 112 2023.128  55.42857   76.42293
## 113 2023.147  47.71429   76.55767
## 114 2023.166  48.14286   76.47483
## 115 2023.186  39.42857   76.31142
## 116 2023.205  48.57143   76.10101
## 117 2023.224  42.42857   75.95022
## 118 2023.243  49.14286   75.76482
## 119 2023.262  49.14286   75.19997
## 120 2023.281  63.85714   74.62897
## 121 2023.300  61.85714   74.02609
## 122 2023.320  70.28571   73.44423
## 123 2023.339  31.71429   72.94484
## 124 2023.358  52.42857   72.42532
## 125 2023.377  63.71429   71.81203
## 126 2023.396  44.57143   71.24608
## 127 2023.415  46.00000   70.72593
## 128 2023.435  38.42857   70.29066
## 129 2023.454  46.85714   69.82849
## 130 2023.473  50.85714   69.32929
## 131 2023.492  62.14286   68.87550
## 132 2023.511  88.85714   68.69950
## 133 2023.530  58.71429   68.55597
## 134 2023.550  60.42857   68.35906
## 135 2023.569  65.14286   68.11365
## 136 2023.588  62.14286   67.92758
## 137 2023.607  54.60000   67.86730
Box.test(residuals(forecast_nn_weekly), lag = 52, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  residuals(forecast_nn_weekly)
## X-squared = 46.689, df = 52, p-value = 0.6821